*******************************************************************************
**
** This script cleans the case mix files from CMS and saves to a hospital-year panel 
**
*******************************************************************************
 


clear all 
capture log close 


* Specify the input and output directories 
* Note: the input directory should have a separate folder for each year of data 
local fpath_input = "/disk/agedisk4/medicare.work/sacarny-DUA51934/shruthi-dua51934/replication_files/build/casemix/input"

local fpath_output = "/disk/agedisk4/medicare.work/sacarny-DUA51934/shruthi-dua51934/replication_files/build/casemix/output"

log using "`fpath_output'/cmi_files.log", replace 

* process the files from 2007-2011?
global MAKEDATA = 1 

* specify start and end year
* NOTE: the file for year Y reflects cases from Y-2
global STARTYEAR = 2005
global ENDYEAR = 2016

* process the source txt files for years 2007-2011
if $MAKEDATA {
	forvalues y = 2007(1)2011 {
		di "Processing txt file for `y'"
		import delimited "`fpath_input'/`y'/casemix`y'.txt", stringcols(1) varnames(1)  clear
		if `y' == 2008 {
			replace cases = subinstr(cases, ",", "", 1)
			destring cases, replace
		}
		desc 
		save "`fpath_input'/`y'/casemix`y'.dta", replace 
	}
}

* clean up the stata files in each year and save to a temporary file 
forvalues y = $STARTYEAR/$ENDYEAR { 
	
	* deal with changes in the variable name for unadjusted cmi 
	if (`y' == 2006) | (inrange(`y', 2009, 2011)) {
		local cmivar "cmi" 
	}
	if inrange(`y', 2007, 2008) {
		local cmivar "unadjustedcmi"
	}
	if inrange(`y', 2012, 2016) | (`y' == 2005) {
		local cmivar "casemixindex"
	}
	
	use provider `cmivar' using "`fpath_input'/`y'/casemix`y'.dta", clear 

	rename provider pn 
	rename `cmivar' casemixindex
	gen year = `y'	
	drop if missing(casemixindex)
	isid pn 
	save "`fpath_output'/temp`y'", replace 
	
}

 
clear 

* pull together the yearly files and save 
forvalues y = $STARTYEAR/$ENDYEAR {
	append using "`fpath_output'/temp`y'" 	
}
replace year = year - 2 
sort pn year 
label var casemixindex "Unadjusted case mix index"
save "`fpath_output'/casemix_processed.dta", replace 




log close 









